Added regular expressions

author Jeroen van der Heijden <jeroen@transceptor.technology>

Fri, 22 Jun 2018 14:41:01 +0000 (16:41 +0200)

committer Jeroen van der Heijden <jeroen@transceptor.technology>

Fri, 22 Jun 2018 14:41:01 +0000 (16:41 +0200)
author Jeroen van der Heijden <jeroen@transceptor.technology>
Fri, 22 Jun 2018 14:41:01 +0000 (16:41 +0200)
committer Jeroen van der Heijden <jeroen@transceptor.technology>
Fri, 22 Jun 2018 14:41:01 +0000 (16:41 +0200)
diff --git a/ChangeLog-2.0.29 b/ChangeLog-2.0.29

new file mode 100644 (file)

index 0000000..40eddec
--- /dev/null
+++ b/ChangeLog-2.0.29
@@ -0,0 +1 @@
+  * Added filter log series by using a regular expressions. (issue #100) 
diff --git a/grammar/gogrammar/grammar.go b/grammar/gogrammar/grammar.go

index 3f5f2d1ef003fd63096bd348d457caa0f8c3265a..eb74c3920bb44f91d8978cc1f3646380118e7f9c 100644 (file)
--- a/grammar/gogrammar/grammar.go
+++ b/grammar/gogrammar/grammar.go
@@ -4,7 +4,7 @@ package grammar
  // should be used with the goleri module.
  //
  // Source class: SiriGrammar
-// Created at: 2018-06-14 16:27:16
+// Created at: 2018-06-22 15:10:04
  
  import (
         "regexp"
@@ -1141,6 +1141,7 @@ func SiriGrammar() *goleri.Grammar {
                         string,
                         rInteger,
                         rFloat,
+                       rRegex,
                         kNan,
                         kInf,
                         kNinf,
diff --git a/grammar/grammar.py b/grammar/grammar.py

index fb96d8d369fdb2ab033a7f44c5d0779d90248611..c01d780d7ebf1ae8100b314985d5d6ea29344c0f 100644 (file)
--- a/grammar/grammar.py
+++ b/grammar/grammar.py
@@ -497,6 +497,7 @@ class SiriGrammar(Grammar):
              string,
              r_integer,
              r_float,
+            r_regex,
              k_nan,
              k_inf,
              k_ninf,
diff --git a/include/siri/db/aggregate.h b/include/siri/db/aggregate.h

index c758aedaedf79b3a1ef424870fab2853d703c64e..95ea715bdb03938592b5f44c93773dd54049b9f9 100644 (file)
--- a/include/siri/db/aggregate.h
+++ b/include/siri/db/aggregate.h
@@ -16,6 +16,7 @@
  #include <slist/slist.h>
  #include <cexpr/cexpr.h>
  #include <qpack/qpack.h>
+#include <pcre2.h>
  
  typedef struct siridb_point_s siridb_point_t;
  typedef struct siridb_points_s siridb_points_t;
@@ -29,6 +30,8 @@ typedef struct siridb_aggr_s
      uint64_t limit;
      uint64_t offset;
      double timespan;  // used for derivative
+    pcre2_code * regex;             \
+    pcre2_match_data * match_data;
      qp_via_t filter_via;
  } siridb_aggr_t;
  
diff --git a/include/siri/grammar/grammar.h b/include/siri/grammar/grammar.h

index 2875b76c9cc0ea808b3d5ecd397ab5c8f0a2cc3c..bd6c2dc83435372fcab9514bb09e13849abf2a0f 100644 (file)
--- a/include/siri/grammar/grammar.h
+++ b/include/siri/grammar/grammar.h
@@ -5,7 +5,7 @@
   * should be used with the libcleri module.
   *
   * Source class: SiriGrammar
- * Created at: 2018-06-14 16:27:16
+ * Created at: 2018-06-22 15:10:04
   */
  #ifndef CLERI_EXPORT_SIRI_GRAMMAR_GRAMMAR_H_
  #define CLERI_EXPORT_SIRI_GRAMMAR_GRAMMAR_H_
diff --git a/include/siri/parser/queries.h b/include/siri/parser/queries.h

index 23de3313fac937dd4020f6dd22e7a719740a3bdb..e40bd45c766fe1da8e8e5a13ff0ddab1e92f80e7 100644 (file)
--- a/include/siri/parser/queries.h
+++ b/include/siri/parser/queries.h
@@ -22,6 +22,7 @@
  #include <siri/db/group.h>
  #include <siri/db/series.h>
  #include <siri/db/user.h>
+#include <pcre2.h>
  
  #define QUERIES_IGNORE_DROP_THRESHOLD 1
  #define QUERIES_SKIP_GET_POINTS 2
diff --git a/include/siri/version.h b/include/siri/version.h

index bf227c3a232c3db4b6e20323177c90c8d30a290d..776eaf58ea0f5c919b24088066da98c899c549f6 100644 (file)
--- a/include/siri/version.h
+++ b/include/siri/version.h
@@ -13,7 +13,7 @@
  
  #define SIRIDB_VERSION_MAJOR 2
  #define SIRIDB_VERSION_MINOR 0
-#define SIRIDB_VERSION_PATCH 28
+#define SIRIDB_VERSION_PATCH 29
  
  #define SIRIDB_STRINGIFY(num) #num
  #define SIRIDB_VERSION_STR(major,minor,patch) \
diff --git a/src/siri/db/aggregate.c b/src/siri/db/aggregate.c

index 898c0541899b3063168cccb905e79026996dc9cd..2fdfd78d38c36ed1563a07bf3463bb0ef77f3e16 100644 (file)
--- a/src/siri/db/aggregate.c
+++ b/src/siri/db/aggregate.c
@@ -16,6 +16,7 @@
  #include <siri/db/median.h>
  #include <siri/db/variance.h>
  #include <siri/grammar/grammar.h>
+#include <siri/db/re.h>
  #include <slist/slist.h>
  #include <stddef.h>
  #include <strextra/strextra.h>
@@ -51,6 +52,7 @@ typedef int (* AGGR_cb)(
  static AGGR_cb AGGREGATES[F_OFFSET];
  
  static siridb_aggr_t * AGGREGATE_new(uint32_t gid);
+static int AGGREGATE_regex_cmp(siridb_aggr_t * aggr, char * val);
  static void AGGREGATE_free(siridb_aggr_t * aggr);
  static int AGGREGATE_init_filter(
          siridb_aggr_t * aggr,
@@ -536,8 +538,10 @@ static siridb_aggr_t * AGGREGATE_new(uint32_t gid)
      aggr->limit = 0;
      aggr->offset = 0;
      aggr->timespan = 1.0;
-    aggr->filter_tp = TP_INT;  /* when string we must
-                                * malloc/free * aggr->filter_via.raw */
+    aggr->regex = NULL;
+    aggr->match_data = NULL;
+    aggr->filter_via.raw = NULL;
+    aggr->filter_tp = TP_INT;  /* when string we must cleanup more */
      return aggr;
  }
  
@@ -549,6 +553,8 @@ static void AGGREGATE_free(siridb_aggr_t * aggr)
      if (aggr->filter_tp == TP_STRING)
      {
          free(aggr->filter_via.raw);
+        pcre2_code_free(aggr->regex);
+        pcre2_match_data_free(aggr->match_data);
      }
      free(aggr);
  }
@@ -601,6 +607,27 @@ static int AGGREGATE_init_filter(
                  (char *) aggr->filter_via.raw, node->str, node->len);
          return 0;
  
+    case CLERI_GID_R_REGEX:
+        if (aggr->filter_opr != CEXPR_EQ && aggr->filter_opr != CEXPR_NE)
+        {
+            sprintf(err_msg,
+                    "Regular expressions can only be used with 'equal' (==) "
+                    "or 'not equal' (!=) operator.");
+            return -1;
+        }
+        aggr->filter_tp = TP_STRING;
+        /* extract and compile regular expression */
+        if (siridb_re_compile(
+                &aggr->regex,
+                &aggr->match_data,
+                node->str,
+                node->len,
+                err_msg))
+        {
+            return -1;  /* error_msg is set */
+        }
+        return 0;
+
      default:
          assert (0);
          break;
@@ -778,6 +805,20 @@ static siridb_points_t * AGGREGATE_difference(
      return points;
  }
  
+static int AGGREGATE_regex_cmp(siridb_aggr_t * aggr, char * val)
+{
+    int ret;
+    ret = pcre2_match(
+            aggr->regex,
+            (PCRE2_SPTR8) val,
+            strlen(val),
+            0,                     // start looking at this point
+            0,                     // OPTIONS
+            aggr->match_data,
+            0);                    // length of sub_str_vec
+    return aggr->filter_opr == CEXPR_EQ ? ret >= 0 : ret < 0;
+}
+
  static siridb_points_t * AGGREGATE_filter(
          siridb_points_t * source,
          siridb_aggr_t * aggr,
@@ -815,7 +856,6 @@ static siridb_points_t * AGGREGATE_filter(
  
      siridb_points_t * points = siridb_points_new(source->len, source->tp);
  
-
      if (points == NULL)
      {
          sprintf(err_msg, "Memory allocation error.");
@@ -832,7 +872,11 @@ static siridb_points_t * AGGREGATE_filter(
                      i < source->len;
                      i++, spt++)
              {
-                if (cexpr_str_cmp(aggr->filter_opr, spt->val.str, value.str))
+                if (value.str != NULL  // NULL is a regular expression
+                        ? cexpr_str_cmp(
+                                aggr->filter_opr,
+                                spt->val.str, value.str)
+                        : AGGREGATE_regex_cmp(aggr, spt->val.str))
                  {
                      dpt->ts = spt->ts;
                      dpt->val.str = strdup(spt->val.str);
diff --git a/src/siri/grammar/grammar.c b/src/siri/grammar/grammar.c

index 1ab80064194fadb7510b51d1cce7c4649bec47a9..00d1ad456ab24833b5f55f38511f550d150f92c5 100644 (file)
--- a/src/siri/grammar/grammar.c
+++ b/src/siri/grammar/grammar.c
@@ -5,7 +5,7 @@
   * should be used with the libcleri module.
   *
   * Source class: SiriGrammar
- * Created at: 2018-06-14 16:27:16
+ * Created at: 2018-06-22 15:10:04
   */
  
  #include "siri/grammar/grammar.h"
@@ -954,10 +954,11 @@ cleri_grammar_t * compile_grammar(void)
          cleri_choice(
              CLERI_NONE,
              CLERI_MOST_GREEDY,
-            6,
+            7,
              string,
              r_integer,
              r_float,
+            r_regex,
              k_nan,
              k_inf,
              k_ninf
diff --git a/test/test_select.py b/test/test_select.py

index 2ed866db90f81ad8d7da4a23bd83601039dfd3ff..6ae91bd12e89513c68568df09023934a1b9d8b2b 100644 (file)
--- a/test/test_select.py
+++ b/test/test_select.py
@@ -3,6 +3,7 @@ import functools
  import random
  import time
  import math
+import re
  from testing import Client
  from testing import default_test_setup
  from testing import gen_data
@@ -214,6 +215,21 @@ class TestSelect(TestBase):
                  [1447253549, 538],
                  [1447254748, 537]]})
  
+        self.assertEqual(
+            await self.client0.query(
+                'select filter(/l.*/) from * where type == string'),
+                {'log': [p for p in DATA['log'] if re.match('l.*', p[1])]})
+
+        self.assertEqual(
+            await self.client0.query(
+                'select filter(==/l.*/) from * where type == string'),
+            {'log': [p for p in DATA['log'] if re.match('l.*', p[1])]})
+
+        self.assertEqual(
+            await self.client0.query(
+                'select filter(!=/l.*/) from * where type == string'),
+            {'log': [p for p in DATA['log'] if not re.match('l.*', p[1])]})
+
          self.assertEqual(
              await self.client0.query('select limit(300, mean) from "aggr"'),
              {'aggr': DATA['aggr']})
@@ -322,6 +338,16 @@ class TestSelect(TestBase):
              await self.client0.query('select difference() from "one"'),
              {'one': []})
  
+        with self.assertRaisesRegexp(
+                QueryError,
+                'Regular expressions can only be used with.*'):
+            await self.client0.query('select filter(~//) from "log"')
+
+        with self.assertRaisesRegexp(
+                QueryError,
+                'Cannot use a string filter on number type.'):
+            await self.client0.query('select filter(//) from "aggr"')
+
          with self.assertRaisesRegexp(
                  QueryError,
                  'Cannot use mean\(\) on string type\.'):
author	Jeroen van der Heijden <jeroen@transceptor.technology>
	Fri, 22 Jun 2018 14:41:01 +0000 (16:41 +0200)
committer	Jeroen van der Heijden <jeroen@transceptor.technology>
	Fri, 22 Jun 2018 14:41:01 +0000 (16:41 +0200)
ChangeLog-2.0.29	[new file with mode: 0644]	patch \| blob
grammar/gogrammar/grammar.go		patch \| blob \| history
grammar/grammar.py		patch \| blob \| history
include/siri/db/aggregate.h		patch \| blob \| history
include/siri/grammar/grammar.h		patch \| blob \| history
include/siri/parser/queries.h		patch \| blob \| history
include/siri/version.h		patch \| blob \| history
src/siri/db/aggregate.c		patch \| blob \| history
src/siri/grammar/grammar.c		patch \| blob \| history
test/test_select.py		patch \| blob \| history